rm(list = ls())
here::i_am(file.path("code", "20_cards_census_match_rates.R"))
library(here)
source(here("code", "config.R"))

cards <- read_parquet(here("data", "analysis", "cards.parquet"), col_select = c("eligible_for_match_census"))
n_cards <- sum(cards$eligible_for_match_census)
census_matches <- read_parquet(here("data", "analysis", "cards_census_matches.parquet")) |>
  pivot_longer(-card_id, names_to = "match_type", values_to = "census_id") |>
  drop_na()

census_matches |>
  mutate(
    x = as.integer(substr(match_type, nchar(match_type), nchar(match_type))),
    match_type = substr(match_type, 1, nchar(match_type) - 2)
  ) |>
  group_by(match_type, x) |>
  summarize(
    n = n(),
    p = n / n_cards
  ) |>
  pivot_wider(id_cols = match_type, names_from = x, values_from = c(n, p)) |>
  select(match_type, n_0, p_0, n_2, p_2) |>
  mutate(
    match_type = factor(
      match_type,
      levels = c("standard_match", "unique_mi_match", "unique_married_match",
                 "unique_prewar_match", "unique_vet_match", "unique_county_match", "unique_any_match"),
      labels = c("Standard", "Middle initial tie-breaker", "Marital status tie-breaker",
                 "Middle initial + marital", "Veteran status tie-breaker", "County tie-breaker", "All tie-breakers")
    ),
    across(starts_with("n_"), ~ formatC(.x, big.mark = ",")),
    across(starts_with("p_"), ~ sprintf("%.3f", .x))
  ) |>
  filter(!is.na(match_type)) |>
  arrange(match_type) |>
  kable(
    col.names = linebreak(
      c("Link type", "Num.", "Prop.", "Num.", "Prop."),
      align = "c"
    ),
    align = c("l", "c", "c", "c", "c"),
    booktabs = TRUE,
    linesep = linesep(c(1, 3, 3)),
    format = "latex"
  ) |>
  add_header_above(c("", "$x$ = 0" = 2, "$x$ = 2" = 2), escape = FALSE) |>
  save_kable(file.path(tab_dir, "cards_census_match_rates.tex"))
